{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import config\n",
    "from model.cnn_document_model import DocumentModel\n",
    "from preprocessing.utils import Preprocess, remove_empty_docs\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from nltk.tokenize import sent_tokenize                                     \n",
    "\n",
    "import keras.backend as K"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load pre-trained IMDB model and data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "imdb_model = DocumentModel.load_model(config.MODEL_DIR+ '/imdb/model_02.json')\n",
    "imdb_model.load_model_weights(config.MODEL_DIR+ '/imdb/model_02.hdf5')\n",
    "\n",
    "model = imdb_model.get_classification_model()\n",
    "model.compile(loss=\"binary_crossentropy\", optimizer='rmsprop', metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_df = Loader.load_imdb_data(directory = 'train')\n",
    "print(train_df.shape)\n",
    "\n",
    "corpus = train_df['review'].tolist()\n",
    "target = train_df['sentiment'].tolist()\n",
    "corpus, target = remove_empty_docs(corpus, target)\n",
    "print(len(corpus))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre process input and compute document embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "preprocessor = Preprocess(corpus=corpus)\n",
    "corpus_to_seq = preprocessor.fit()\n",
    "\n",
    "corpus = train_df['review'].tolist()\n",
    "target = train_df['sentiment'].tolist()\n",
    "corpus_to_seq = preprocessor.transform(corpus)\n",
    "\n",
    "x_train = np.array(corpus_to_seq)\n",
    "y_train = np.array(target)\n",
    "\n",
    "print(x_train.shape, y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print('Evaluating Model ...')\n",
    "print(model.evaluate(x_train, y_train))\n",
    "\n",
    "preds = model.predict(x_train)\n",
    "\n",
    "#invert predicted label\n",
    "pseudo_label = np.subtract(1,preds)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gradient Calculation of inverted output w.r.t sentence embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Get the learned sentence embeddings\n",
    "sentence_ebd = imdb_model.get_sentence_model().predict(x_train)\n",
    "\n",
    "input_tensors = [model.inputs[0], # input data\n",
    "                 model.sample_weights[0], # how much to weight each sample by\n",
    "                 model.targets[0], # labels                 \n",
    "]\n",
    "#variable tensor at the sentence embeding layer\n",
    "weights = imdb_model.get_sentence_model().outputs\n",
    "\n",
    "#calculate gradient of the total model loss w.r.t \n",
    "#the variables at sentence embd layer                                     \n",
    "gradients = model.optimizer.get_gradients(model.total_loss, weights) \n",
    "get_gradients = K.function(inputs=input_tensors, outputs=gradients)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "document_number = 2\n",
    "K.set_learning_phase(0)\n",
    "inputs = [[x_train[document_number]], # X\n",
    "          [1], # sample weights\n",
    "          [[pseudo_label[document_number][0]]], # y\n",
    "]\n",
    "grad = get_gradients(inputs)\n",
    "\n",
    "sent_score = []\n",
    "for i in range(Preprocess.NUM_SENTENCES):\n",
    "    #sent_score.append((i, -np.abs(np.dot(grad[0][0][i],sentence_ebd[document_number][i])))) #DECREASING\n",
    "    sent_score.append((i, -np.linalg.norm(grad[0][0][i])))\n",
    "\n",
    "sent_score.sort(key=lambda tup: tup[1])\n",
    "summary_sentences = [ i for i, s in sent_score[:4]]\n",
    "\n",
    "doc = corpus[document_number]\n",
    "label = y_train[document_number]\n",
    "prediction = preds[document_number]\n",
    "print(doc, label , prediction)\n",
    "\n",
    "sentences = sent_tokenize(doc)\n",
    "for i in summary_sentences:\n",
    "    print(i, sentences[i])\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
